from keras.utils import np_utils
import numpy as np
np.random.seed(10)
from keras.datasets import mnist
#預處理
(x_train_image,y_train_label),
(x_test_image,y_test_label)=mnist.load_data()
x_Train = x_train_image.reshape(60000,784).astype('float32')
x_Test = x_test_image.reshape(10000,784).astype('float32')
x_Train_normalize = x_Train / 255
x_Test_normalize = x_Test / 255
y_Train_OneHot = np_utils.to_categorical(y_train_label)
y_Test_OneHot = np_utils.to_categorical(y_test_label)
#建立模型
from keras.models import Sequential
from keras.layers import Dense
model = Sequential()
model.add(Dense(units=256,input_dim=784,kernel_initializer='normal',activation='relu'))
model.add(Dense(units=10,kernel_initializer='normal',activation='softmax'))
#print(model.summary())
#訓練
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
train_history = model.fit(x=x_Train_normalize,y = y_Train_OneHot,validation_split=0.2,epochs = 10,batch_size=200,verbose=2)
#建立show_train_history來顯示訓練過程
import matplotlib.pyplot as plt#定義show_train_history(之前訓練產生的)(train_history,訓練執行結果,驗證資料執行結果)
def show_train_history(train_history,train,validation):
plt.plot(train_history.history[train])
plt.plot(train_history.history[validation])
plt.title('Train History')#圖形標題
plt.ylabel(train)#顯示y軸標籤
plt.xlabel('Epoch')#設定x軸標籤是'Epoch'
plt.legend(['train','validation'],loc='upper left')#設定圖例顯示'train','validation'在左上角
plt.show()
#畫出accuracy執行結果(這裡有一個要注意的地方是,windows在顯示訓練結果的時候是以acc表現accuracy所以要將accuracy改為acc,mac則是要寫accuracy)
#show_train_history(train_history,'accuracy','val_accuracy')
#畫出loss執行結果
#show_train_history(train_history,'loss','val_loss')
#使用model.evaluate進行評估模型準確率,評估後的準確率會存在scores(features,label)
scores = model.evaluate(x_Test_normalize,y_Test_OneHot)
print()
#顯示準確率
print('accuracy',scores[1])
#執行預測
prediction = model.predict_classes(x_Test)
prediction
plot_images_labels_prediction(x_test_image,y_test_label,prediction,idx=340)
輸出:
#使用pandas建立混淆矩陣
import pandas as pd
#使用pd.crosstab建立混淆矩陣(測試資料label,預測值,設定行的名稱,設定列的名稱)
pd.crosstab(y_test_label,prediction,rownames=['label'],colnames=['predict'])
輸出:
對角線是預測正確,可以看到5和3最容易搞混
#建立df查看label和prediction
df = pd.DataFrame({'label':y_test_label,'predict':prediction})
df[:2]
輸出:
#建立完成後,查看當label=5,predict=3的資料
df[(df.label==5)&(df.predict==3)]
輸出:
可以看到真實值是5預測卻是3的資料為340,1003,1393...等
#然後我們想知道是哪些圖被誤判
plot_images_labels_prediction(x_test_image,y_test_label,prediction,idx=第幾筆資料,num)
可以看到都是一些被寫得很像3的5